{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5.7 使用重复元素的网络（VGG）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.0.0-rc1\n",
      "是否使用cuda: True\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import time\n",
    "import paddle\n",
    "from paddle import nn, optimizer\n",
    "\n",
    "print(paddle.__version__)\n",
    "print(\"是否使用cuda:\", paddle.is_compiled_with_cuda())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.7.1 VGG块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/iie/miniconda3/envs/yolov5/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
      "  and should_run_async(code)\n"
     ]
    }
   ],
   "source": [
    "def vgg_block(num_convs, in_channels, out_channels):\n",
    "    blk = []\n",
    "    for i in range(num_convs):\n",
    "        if i == 0:\n",
    "            blk.append(nn.Conv2D(in_channels, out_channels, kernel_size=3, padding=1))\n",
    "        else:\n",
    "            blk.append(nn.Conv2D(out_channels, out_channels, kernel_size=3, padding=1))\n",
    "        blk.append(nn.ReLU())\n",
    "    blk.append(nn.MaxPool2D(kernel_size=2, stride=2))\n",
    "    return nn.Sequential(*blk)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.7.2 VGG网络"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))\n",
    "fc_features = 512 * 7 * 7 # 根据卷积层的输出算出来的\n",
    "fc_hidden_units = 4096 # 任意"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class FlattenLayer(nn.Layer):\n",
    "    def __init__(self):\n",
    "        super(FlattenLayer, self).__init__()\n",
    "    def forward(self, x): # x shape: (batch, *, *, ...)\n",
    "        return x.reshape((x.shape[0], -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def vgg(conv_arch, fc_features, fc_hidden_units=4096):\n",
    "    net = nn.Sequential()\n",
    "    # 卷积层部分\n",
    "    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):\n",
    "        net.add_sublayer(\"vgg_block_\" + str(i+1), vgg_block(num_convs, in_channels, out_channels))\n",
    "    # 全连接层部分\n",
    "    net.add_sublayer(\"fc\", nn.Sequential(\n",
    "                                 FlattenLayer(),\n",
    "                                 nn.Linear(fc_features, fc_hidden_units),\n",
    "                                 nn.ReLU(),\n",
    "                                 nn.Dropout(0.5),\n",
    "                                 nn.Linear(fc_hidden_units, fc_hidden_units),\n",
    "                                 nn.ReLU(),\n",
    "                                 nn.Dropout(0.5),\n",
    "                                 nn.Linear(fc_hidden_units, 10)\n",
    "                                ))\n",
    "    return net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "vgg_block_1 output shape:  [1, 64, 112, 112]\n",
      "vgg_block_2 output shape:  [1, 128, 56, 56]\n",
      "vgg_block_3 output shape:  [1, 256, 28, 28]\n",
      "vgg_block_4 output shape:  [1, 512, 14, 14]\n",
      "vgg_block_5 output shape:  [1, 512, 7, 7]\n",
      "fc output shape:  [1, 10]\n"
     ]
    }
   ],
   "source": [
    "net = vgg(conv_arch, fc_features, fc_hidden_units)\n",
    "X = paddle.rand((1, 1, 224, 224))\n",
    "\n",
    "# named_children获取一级子模块及其名字(named_modules会返回所有子模块,包括子模块的子模块)\n",
    "for name, blk in net.named_children(): \n",
    "    X = blk(X)\n",
    "    print(name, 'output shape: ', X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<paddle.fluid.dygraph.container.Sequential object at 0x7f9492223220>\n"
     ]
    }
   ],
   "source": [
    "ratio = 8\n",
    "small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio), \n",
    "                   (2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]\n",
    "net = vgg(small_conv_arch, fc_features // ratio, fc_hidden_units // ratio)\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.7.3 获取数据和训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import paddle.vision.datasets as datasets\n",
    "import paddle.vision.transforms as transforms\n",
    "import sys\n",
    "\n",
    "\n",
    "def load_data_fashion_mnist(batch_size, resize=None):\n",
    "    \"\"\"Download the fashion mnist dataset and then load into memory.\"\"\"\n",
    "    trans = []\n",
    "    if resize:\n",
    "        trans.append(transforms.Resize(size=resize))\n",
    "    trans.append(transforms.ToTensor())\n",
    "    \n",
    "    transform = transforms.Compose(trans)\n",
    "    mnist_train = datasets.FashionMNIST(mode='train', download=True, transform=transform)\n",
    "    mnist_test = datasets.FashionMNIST(mode='test', download=True, transform=transform)\n",
    "    if sys.platform.startswith('win'):\n",
    "        num_workers = 0  # 0表示不用额外的进程来加速读取数据\n",
    "    else:\n",
    "        num_workers = 0\n",
    "    train_iter = paddle.io.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)\n",
    "    test_iter = paddle.io.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)\n",
    "\n",
    "    return train_iter, test_iter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_accuracy(data_iter, net):\n",
    "\n",
    "    acc_sum, n = 0.0, 0\n",
    "    with paddle.no_grad():\n",
    "        for X, y in data_iter:\n",
    "            if isinstance(net, nn.Layer):\n",
    "                net.eval() # 评估模式, 这会关闭dropout\n",
    "                acc_sum += (net(X).argmax(axis=1) == y.flatten()).astype('float32').sum().numpy()[0]\n",
    "                net.train() # 改回训练模式\n",
    "            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU\n",
    "                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数\n",
    "                    # 将is_training设置成False\n",
    "                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() \n",
    "                else:\n",
    "                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() \n",
    "            n += y.shape[0]\n",
    "    return acc_sum / n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_ch5(net, train_iter, test_iter, batch_size, optimi, num_epochs):\n",
    "\n",
    "    loss = nn.CrossEntropyLoss()\n",
    "    batch_count = 0\n",
    "    for epoch in range(num_epochs):\n",
    "        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()\n",
    "        for idx, (X, y) in enumerate(train_iter):\n",
    "            y_hat = net(X)\n",
    "            l = loss(y_hat, y)\n",
    "            optimi.clear_grad()\n",
    "            l.backward()\n",
    "            optimi.step()\n",
    "            train_l_sum += l.numpy()[0]\n",
    "            train_acc_sum += (y_hat.argmax(axis=1) == y.flatten()).astype('float32').sum().numpy()[0]\n",
    "            n += y.shape[0]\n",
    "            batch_count += 1\n",
    "        test_acc = evaluate_accuracy(test_iter, net)\n",
    "        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\n",
    "              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))\n",
    "     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch 1, loss 0.4803, train acc 0.828, test acc 0.889, time 80.1 sec\n",
      "epoch 2, loss 0.1440, train acc 0.896, test acc 0.905, time 72.3 sec\n",
      "epoch 3, loss 0.0815, train acc 0.911, test acc 0.914, time 69.4 sec\n",
      "epoch 4, loss 0.0541, train acc 0.923, test acc 0.916, time 68.4 sec\n",
      "epoch 5, loss 0.0391, train acc 0.929, test acc 0.919, time 69.3 sec\n"
     ]
    }
   ],
   "source": [
    "batch_size = 64\n",
    "# 如出现“out of memory”的报错信息，可减小batch_size或resize\n",
    "train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)\n",
    "\n",
    "lr, num_epochs = 0.001, 5\n",
    "optimi = optimizer.Adam(parameters=net.parameters(), learning_rate=lr)\n",
    "train_ch5(net, train_iter, test_iter, batch_size, optimi, num_epochs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
